{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "86066f02-8b3e-4136-8f3d-d49c741030ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "545fef5a-46a9-44fa-97f0-bda7144346ab",
   "metadata": {},
   "source": [
    "# 基于线性回归房价估值"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "515bf217-a543-4e91-a915-53dfeeb5e59a",
   "metadata": {},
   "source": [
    "## 1、导入数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8c391773-d1c7-4cc3-a28e-7370a1d00301",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X1 transaction date</th>\n",
       "      <th>X2 house age</th>\n",
       "      <th>X3 distance to the nearest MRT station</th>\n",
       "      <th>X4 number of convenience stores</th>\n",
       "      <th>X5 latitude</th>\n",
       "      <th>X6 longitude</th>\n",
       "      <th>Y house price of unit area</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>No</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012.916667</td>\n",
       "      <td>32.0</td>\n",
       "      <td>84.87882</td>\n",
       "      <td>10</td>\n",
       "      <td>24.98298</td>\n",
       "      <td>121.54024</td>\n",
       "      <td>37.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012.916667</td>\n",
       "      <td>19.5</td>\n",
       "      <td>306.59470</td>\n",
       "      <td>9</td>\n",
       "      <td>24.98034</td>\n",
       "      <td>121.53951</td>\n",
       "      <td>42.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2013.583333</td>\n",
       "      <td>13.3</td>\n",
       "      <td>561.98450</td>\n",
       "      <td>5</td>\n",
       "      <td>24.98746</td>\n",
       "      <td>121.54391</td>\n",
       "      <td>47.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2013.500000</td>\n",
       "      <td>13.3</td>\n",
       "      <td>561.98450</td>\n",
       "      <td>5</td>\n",
       "      <td>24.98746</td>\n",
       "      <td>121.54391</td>\n",
       "      <td>54.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2012.833333</td>\n",
       "      <td>5.0</td>\n",
       "      <td>390.56840</td>\n",
       "      <td>5</td>\n",
       "      <td>24.97937</td>\n",
       "      <td>121.54245</td>\n",
       "      <td>43.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    X1 transaction date  X2 house age  X3 distance to the nearest MRT station  \\\n",
       "No                                                                              \n",
       "1           2012.916667          32.0                                84.87882   \n",
       "2           2012.916667          19.5                               306.59470   \n",
       "3           2013.583333          13.3                               561.98450   \n",
       "4           2013.500000          13.3                               561.98450   \n",
       "5           2012.833333           5.0                               390.56840   \n",
       "\n",
       "    X4 number of convenience stores  X5 latitude  X6 longitude  \\\n",
       "No                                                               \n",
       "1                                10     24.98298     121.54024   \n",
       "2                                 9     24.98034     121.53951   \n",
       "3                                 5     24.98746     121.54391   \n",
       "4                                 5     24.98746     121.54391   \n",
       "5                                 5     24.97937     121.54245   \n",
       "\n",
       "    Y house price of unit area  \n",
       "No                              \n",
       "1                         37.9  \n",
       "2                         42.2  \n",
       "3                         47.3  \n",
       "4                         54.8  \n",
       "5                         43.1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "real_estate = pd.read_excel('../dataset/Real estate valuation data set.xlsx', index_col='No')\n",
    "real_estate.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "32812f3a-dcee-4c73-9b59-f61326d8d8e0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 414 entries, 1 to 414\n",
      "Data columns (total 7 columns):\n",
      " #   Column                                  Non-Null Count  Dtype  \n",
      "---  ------                                  --------------  -----  \n",
      " 0   X1 transaction date                     414 non-null    float64\n",
      " 1   X2 house age                            414 non-null    float64\n",
      " 2   X3 distance to the nearest MRT station  414 non-null    float64\n",
      " 3   X4 number of convenience stores         414 non-null    int64  \n",
      " 4   X5 latitude                             414 non-null    float64\n",
      " 5   X6 longitude                            414 non-null    float64\n",
      " 6   Y house price of unit area              414 non-null    float64\n",
      "dtypes: float64(6), int64(1)\n",
      "memory usage: 25.9 KB\n"
     ]
    }
   ],
   "source": [
    "real_estate.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "39ea323e-9348-44ef-b2d6-e4bd378ba592",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = real_estate.iloc[:, :-1].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "737bacf4-b1be-4a54-8ffd-cd67b0018348",
   "metadata": {},
   "outputs": [],
   "source": [
    "target = real_estate.iloc[:, -1].copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c46ca6c6-eb37-47ee-bfb4-3164ad3a0fa2",
   "metadata": {},
   "source": [
    "## 2、划分数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "62783235-16e6-4138-9c15-78de50bc19f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "cc25b788-6146-48e6-98dc-4fd288991bb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(data.values, target, test_size=44, random_state=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0bb484ee-7dc7-4c14-bf13-9b68ce0fe010",
   "metadata": {},
   "source": [
    "## 3、模型训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e13c0f69-7478-4be5-b33e-e3770fdbb6c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "76d58bad-bf41-4a52-b54e-77a490714068",
   "metadata": {},
   "outputs": [],
   "source": [
    "linear = LinearRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "4d122d6a-0d29-4f58-8227-36e2ea71ee13",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "LinearRegression()"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linear.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "74551830-a341-4444-8d3b-b2ee5ea47169",
   "metadata": {},
   "source": [
    "## 4、模型评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "c4d7afb6-ef39-4ca7-808e-94beafe79025",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7346827325793779"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linear.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "802e7bf2-c1c6-41d9-8bc6-f1c14aa861d9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
